

###############################################
########## JEPS OPTIMAL PERSUASION ############
###############################################

## Optimal Persuasion under Confirmation Bias: Theory and Evidence from a Registered Report
## Journal of Experimental Political Science
## Author: Love Christensen
## website: lovechristensen.com
## Contact: love.christensen@gmail.com

################################################
############ REPLICATION MATERIAL ##############
################################################

######### FILE 1: DATA PREP AND MODELS #########

## load libraries

## for analysis
require(tidyverse)
require(readstata13)
require(estimatr)
require(car)

## for graphs
require(data.table)
require(tikzDevice)
require(gridExtra)
require(ggpubr)


#####################################################
############ BEFORE RUNNING THE SCRIPT ##############
#####################################################

### For the script to run successfully, please ensure that the following folder structure/work flow is in place
## 1. Set working directory to the folder titled "script"
## 2. Make sure that the parent directory to the script folder contains
# (a) a folder titled "data", containing the data set
# (b) a folder titled "output" which itself contains two folders titled "tables" and "figures"

####################################################
####################################################
####################################################

## This sets the working directory to the source file location (requires that the code is run in the Rstudio IDE)
## Otherwise, set this manually by pasting the the path to the script folder
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

## Read the experimental data
dat <- read.dta13("../data/opcb_080221.dta")

#################################################
################### DATA PREP ###################
#################################################

### Define and clean the outcome variables
## 1. Round the numeric outcome and the treatment variables to the first decimal to get rid of the quirks of the numerical variables from qualtrics
## 2. Fix the scale of prediction credibility so that it runs from 0 to 10 and not 1 to 11

dat <- dat %>%
  mutate(diff = round(tpp_post_quant_1 - tpp_prior_quant_1, 1), # updating
         prediction_cred = prediction_cred - 1, # credibility
         prediction = round(prediction, 1),
         preddistvalue = round(preddistvalue, 1),
         tpp_prior_quant_1 = round(tpp_prior_quant_1, 1),
         prior = round(tpp_prior_quant_1, 1),
         tpp_post_quant_1 = round(tpp_post_quant_1, 1))

## Note that *diff* is the updating outcome variable and *prediction_cred* the credibility perception outcome
## The online appendix contains details on all measurement


## Generate the binned treatment variable
# Need -1 as a break to ensure that respondents with prediction distance = 0 is included
dat <- dat %>%
  mutate(preddist_cut = cut(preddist, breaks = c(-1, seq(5, 30, 5))))

## Generate the variables for the heterogeneity analysis
# 1. copartisan (w/ leaners defined as partisans)
# 2. dummy for certain prior
# 3. partisanship dummies
# 4. partisan sender
dat <- dat %>%
  mutate(copartisan = ifelse((pid == 1 & party == "Republicans in Congress") |
                             (pid == 2 & party == "Democrats in Congress") | 
                             (pid_lean == 1 & party == "Republicans in Congress") |
                             (pid_lean == 2 & party == "Democrats in Congress"), 1, 0),
         copartisan = replace_na(copartisan, 0),
         copartisan = ifelse(is.na(pid) & is.na(pid_lean), NA, copartisan),
         certain = ifelse(tpp_prior_cert >= 5, 1, 0),
         republican = ifelse(pid == 1, 1, 0),
         democrat = ifelse(pid == 2, 1, 0),
         independent = ifelse(pid %in% c(3,4), 1, 0),
         rep_sender = ifelse(party == "Republicans in Congress", 1, 0),
         dem_sender = ifelse(party == "Democrats in Congress", 1, 0))


#######################################################
###################### ANALYSIS #######################
#######################################################

## MODEL ORDER:

# 1. Table 2: "Effect of Prediction Distance on Updating and Uncertainty"
# 2. Figure 5: "Non-Continuous Effects on Updating, Perceived Credibility and Support"
# 3. Appendix Figure 3: "Effect of Treatment on Prior Belief and Prior Certainty"
# 4. Appendix Figure 5: "Effects of the Factorized Prediction Distance Variable"
# 5. Appendix Figure 6: "Copartisan Heterogeneity"
# 6. Appendix Table 1: "Heterogeneity: Copartisan Sender"
# 7. Appendix Figure 7: "Partisan Heterogeneity"
# 8. Appendix Figure 8: "Prior Certainty Heterogeneity"
# 9. Appendix Table 2: "Heterogeneity: Prior Certainty"
# 10. Appendix Table 3: "Dropping Potential Floor Observations"
# 11. Appendix Table 4: "Controlling for Prior Beliefs"
# 12. Updating heterogeneity (NO TABLE OR FIGURE)
# 13. CONSORT Flow Diagram Numbers
# 14. Chi^2 Test for examining randomization

#######################################################
###################### TABLE  2 #######################
#######################################################

## "Effect of Prediction Distance on Updating and Uncertainty"

###################### UPDATING ########################

m1_u <- lm_robust(diff ~ I(preddistvalue),
                  se = "stata",
                  data = dat)
m1_u_aic <- AIC(lm(diff ~ I(preddistvalue), data = dat))

m2_u <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2),
                  se = "stata",
                  data = dat)
m2_u_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2), data = dat))

m3_u <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3),
                  se = "stata",
                  data = dat)
m3_u_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3), data = dat))

m6_u <- lm_robust(diff ~ I(preddistvalue) + (preddistvalue == 0),
                  se = "stata",
                  data = dat)
m6_u_aic <- AIC(lm(diff ~ I(preddistvalue) + (preddistvalue == 0), 
                   data = dat))

m7_u <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2) + (preddistvalue == 0),
                  se = "stata",
                  data = dat)

m7_u_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2) + (preddistvalue == 0), 
                   data = dat))

m8_u <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3) + (preddistvalue == 0),
                  se = "stata",
                  data = dat)
m8_u_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3) + (preddistvalue == 0), 
                   data = dat))

###################### CREDIBILITY ########################

m1_c <- lm_robust(prediction_cred ~ I(preddistvalue),
                  se = "stata",
                  data = dat)
m1_c_aic <- AIC(lm(prediction_cred ~ I(preddistvalue), data = dat))

m2_c <- lm_robust(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2),
                  se = "stata",
                  data = dat)
m2_c_aic <- AIC(lm(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2), data = dat))

m4_c <- lm_robust(prediction_cred ~ I(preddistvalue)  + (preddistvalue == 0),
                  se = "stata",
                  data = dat)
m4_c_aic <- AIC(lm(prediction_cred ~ I(preddistvalue)  + (preddistvalue == 0), dat))

m5_c <- lm_robust(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2)  + (preddistvalue == 0),
                  se = "stata",
                  data = dat)
m5_c_aic <- AIC(lm(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2)  + (preddistvalue == 0), data = dat))


#####################################################
##################### FIGURE 5 ######################
#####################################################

## "Non-Continuous Effects on Updating, Perceived Credibility and Support"

m4_u <- lm_robust(diff ~ factor(preddist_cut),
                  se = "stata",
                  data = dat)

m3_c <- lm_robust(prediction_cred ~ factor(preddist_cut),
                  se = "stata",
                  data = dat)

m1_a <- lm_robust(tpp_attitude ~ factor(preddist_cut),
                  se = "stata",
                  data = dat)

#############################################
############# APPENDIX FIGURE 3 #############
#############################################

## "Effect of Treatment on Prior Belief and Prior Certainty"

m1_priors <- lm_robust(tpp_prior_quant_1 ~ factor(preddist),
                       se = "stata",
                       data = dat)

m2_priors <- lm_robust(tpp_prior_cert ~ factor(preddist),
                       se = "stata",
                       data = dat)


#####################################################
################ APPENDIX FIGURE 5 ##################
#####################################################

## "Effects of the Factorized Prediction Distance Variable"

m10_u <- lm_robust(diff ~ factor(preddist),
                   se = "stata",
                   data = dat)

m10_c  <- lm_robust(prediction_cred ~ factor(preddist),
                    se = "stata",
                    data = dat)

m10_a <- lm_robust(tpp_attitude ~ factor(preddist),
                   se = "stata",
                   data = dat)


#################################################
############### APPENDIX FIGURE 6 ###############
#################################################

## "Copartisan Heterogeneity"

m1_up <- lm_robust(diff ~ factor(preddist_cut) * (copartisan == 1) +
                     factor(pid),
                  se = "stata",
                  data = dat)

m1_pc <- lm_robust(prediction_cred ~ factor(preddist_cut) * (copartisan == 1) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m1_pa <- lm_robust(tpp_attitude ~ factor(preddist_cut) * (copartisan == 1) +
                     factor(pid),
                   se = "stata",
                   data = dat)

##################################################
################ APPENDIX TABLE 1 ################
##################################################

## "Heterogeneity: Copartisan Sender"

m2_pu <- lm_robust(diff ~ I(preddistvalue) * (copartisan == 1) + (preddistvalue == 0) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m2_pu_aic <- AIC(lm(diff ~ I(preddistvalue) * (copartisan == 1) + 
                      (preddistvalue == 0) +
                      factor(pid),
                    data = dat))

m3_pu <- lm_robust(diff ~ I(preddistvalue) * (copartisan == 1) + 
                     I(preddistvalue^2) * (copartisan == 1) +
                     (preddistvalue == 0) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m3_pu_aic <- AIC(lm(diff ~ I(preddistvalue) * (copartisan == 1) + 
                      I(preddistvalue^2) * (copartisan == 1) +
                      (preddistvalue == 0) +
                      factor(pid),
                    data = dat))

m4_pu <- lm_robust(diff ~ I(preddistvalue) * (copartisan == 1) + 
                     I(preddistvalue^2) * (copartisan == 1) +
                     I(preddistvalue^3) * (copartisan == 1) +
                     (preddistvalue == 0) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m4_pu_aic <- AIC(lm(diff ~ I(preddistvalue) * (copartisan == 1) + 
                      I(preddistvalue^2) * (copartisan == 1) +
                      I(preddistvalue^3) * (copartisan == 1) +
                      (preddistvalue == 0) +
                      factor(pid),
                    data = dat))


m2_pc <- lm_robust(prediction_cred ~ I(preddistvalue) * (copartisan == 1) 
                   + (preddistvalue == 0) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m2_pc_aic <- AIC(lm(prediction_cred ~ I(preddistvalue) * (copartisan == 1) 
                    + (preddistvalue == 0) +
                      factor(pid),
                    data = dat))

m3_pc <- lm_robust(prediction_cred ~ I(preddistvalue) * (copartisan == 1) + 
                     I(preddistvalue^2) * (copartisan == 1) +
                     (preddistvalue == 0) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m3_pc_aic <- AIC(lm(prediction_cred ~ I(preddistvalue) * (copartisan == 1) + 
                      I(preddistvalue^2) * (copartisan == 1) +
                      (preddistvalue == 0) +
                      factor(pid),
                    data = dat))


#####################################################
################# APPENDIX FIGURE 7 #################
#####################################################

## "Partisan Heterogeneity"

## republicans
m1_pr_u <- lm_robust(diff ~ factor(preddist_cut) * 
                       (rep_sender == 1),
                     se = "stata",
                     data = subset(dat, republican == 1))

m1_pr_c <- lm_robust(prediction_cred ~ factor(preddist_cut) * 
                       (rep_sender == 1),
                     se = "stata",
                     data = subset(dat, republican == 1))

m1_pr_a <- lm_robust(tpp_attitude ~ factor(preddist_cut) * 
                       (rep_sender == 1),
                     se = "stata",
                     data = subset(dat, republican == 1))

## independents
m1_pi_u <- lm_robust(diff ~ factor(preddist_cut) * 
                       (rep_sender == 1),
                     se = "stata",
                     data = subset(dat, independent == 1))

m1_pi_c <- lm_robust(prediction_cred ~ factor(preddist_cut) * 
                       (rep_sender == 1),
                     se = "stata",
                     data = subset(dat, independent == 1))

m1_pi_a <- lm_robust(tpp_attitude ~ factor(preddist_cut) * 
                       (rep_sender == 1),
                     se = "stata",
                     data = subset(dat, independent == 1))

## democrats
m1_pd_u <- lm_robust(diff ~ factor(preddist_cut) * 
                       (rep_sender == 1),
                     se = "stata",
                     data = subset(dat, democrat == 1))

m1_pd_c <- lm_robust(prediction_cred ~ factor(preddist_cut) * 
                       (rep_sender == 1),
                     se = "stata",
                     data = subset(dat, democrat == 1))

m1_pd_a <- lm_robust(tpp_attitude ~ factor(preddist_cut) * 
                       (rep_sender == 1),
                     se = "stata",
                     data = subset(dat, democrat == 1))

##############################################
############## APPENDIX FIGURE 8 #############
##############################################

## "Prior Certainty Heterogeneity"

m1_cu <- lm_robust(diff ~ factor(preddist_cut) * (certain == 1) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m1_cc <- lm_robust(prediction_cred ~ factor(preddist_cut) * (certain == 1) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m1_ca <- lm_robust(tpp_attitude ~ factor(preddist_cut) * (certain == 1) +
                     factor(pid),
                   se = "stata",
                   data = dat)


##################################################
################ APPENDIX TABLE 2 ################
##################################################

## "Heterogeneity: Prior Certainty"

m2_cu <- lm_robust(diff ~ I(preddistvalue) * (certain == 1) + (preddistvalue == 0) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m2_cu_aic <- AIC(lm(diff ~ I(preddistvalue) * (certain == 1) + 
                      (preddistvalue == 0) +
                      factor(pid),
                    data = dat))

m3_cu <- lm_robust(diff ~ I(preddistvalue) * (certain == 1) + 
                     I(preddistvalue^2) * (certain == 1) +
                     (preddistvalue == 0) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m3_cu_aic <- AIC(lm(diff ~ I(preddistvalue) * (certain == 1) + 
                      I(preddistvalue^2) * (certain == 1) +
                      (preddistvalue == 0) +
                      factor(pid),
                    data = dat))

m4_cu <- lm_robust(diff ~ I(preddistvalue) * (certain == 1) + 
                     I(preddistvalue^2) * (certain == 1) +
                     I(preddistvalue^3) * (certain == 1) +
                     (preddistvalue == 0) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m4_cu_aic <- AIC(lm(diff ~ I(preddistvalue) * (certain == 1) + 
                      I(preddistvalue^2) * (certain == 1) +
                      I(preddistvalue^3) * (certain == 1) +
                      (preddistvalue == 0) +
                      factor(pid),
                    data = dat))


m2_cc <- lm_robust(prediction_cred ~ I(preddistvalue) * (certain == 1) 
                   + (preddistvalue == 0) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m2_cc_aic <- AIC(lm(prediction_cred ~ I(preddistvalue) * (certain == 1) 
                    + (preddistvalue == 0) +
                      factor(pid),
                    data = dat))

m3_cc <- lm_robust(prediction_cred ~ I(preddistvalue) * (certain == 1) + 
                     I(preddistvalue^2) * (certain == 1) +
                     (preddistvalue == 0) +
                     factor(pid),
                   se = "stata",
                   data = dat)

m3_cc_aic <- AIC(lm(prediction_cred ~ I(preddistvalue) * (certain == 1) + 
                      I(preddistvalue^2) * (certain == 1) +
                      (preddistvalue == 0) +
                      factor(pid),
                    data = dat))

########################################
########### APPENDIX TABLE 3 ###########
########################################

## "Dropping Potential Floor Observations"

# Exclude respondents with priors who could hit floor
dat_subs_rob <- dat %>% filter(tpp_prior_quant_1 >= -9.3)

m1_u_rob <- lm_robust(diff ~ I(preddistvalue),
                  se = "stata",
                  data = dat_subs_rob)
m1_u_rob_aic <- AIC(lm(diff ~ I(preddistvalue), data = dat_subs_rob))

m2_u_rob <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2),
                  se = "stata",
                  data = dat_subs_rob)
m2_u_rob_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2), 
                       data = dat_subs_rob))

m3_u_rob <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3),
                  se = "stata",
                  data = dat_subs_rob)
m3_u_rob_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3), 
                       data = dat_subs_rob))

m6_u_rob <- lm_robust(diff ~ I(preddistvalue) + (preddistvalue == 0),
                  se = "stata",
                  data = dat_subs_rob)
m6_u_rob_aic <- AIC(lm(diff ~ I(preddistvalue) + (preddistvalue == 0), 
                   data = dat_subs_rob))

m7_u_rob <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2) + (preddistvalue == 0),
                  se = "stata",
                  data = dat_subs_rob)

m7_u_rob_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2) + (preddistvalue == 0), 
                   data = dat_subs_rob))

m8_u_rob <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3) + (preddistvalue == 0),
                  se = "stata",
                  data = dat_subs_rob)
m8_u_rob_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3) + (preddistvalue == 0), 
                   data = dat_subs_rob))

m1_c_rob <- lm_robust(prediction_cred ~ I(preddistvalue),
                  se = "stata",
                  data = dat_subs_rob)
m1_c_rob_aic <- AIC(lm(prediction_cred ~ I(preddistvalue), 
                       data = dat_subs_rob))

m2_c_rob <- lm_robust(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2),
                  se = "stata",
                  data = dat_subs_rob)
m2_c_rob_aic <- AIC(lm(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2), 
                       data = dat_subs_rob))

m4_c_rob <- lm_robust(prediction_cred ~ I(preddistvalue)  + (preddistvalue == 0),
                  se = "stata",
                  data = dat_subs_rob)
m4_c_rob_aic <- AIC(lm(prediction_cred ~ I(preddistvalue)  + (preddistvalue == 0), 
                       dat))

m5_c_rob <- lm_robust(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2)  + (preddistvalue == 0),
                  se = "stata",
                  data = dat_subs_rob)
m5_c_rob_aic <- AIC(lm(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2)  + (preddistvalue == 0), 
                       data = dat_subs_rob))


########################################
########### APPENDIX TABLE 4 ###########
########################################

## "Controlling for Prior Beliefs"

# Include fixed effects for prior beliefs
m1_u_prior <- lm_robust(diff ~ I(preddistvalue),
                      se = "stata",
                      fixed_effects = tpp_prior_quant_1,
                      data = dat)
m1_u_prior_aic <- AIC(lm(diff ~ I(preddistvalue) + factor(tpp_prior_quant_1), 
                         data = dat))

m2_u_prior <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2),
                        se = "stata",
                        fixed_effects = tpp_prior_quant_1,
                        data = dat)

m2_u_prior_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2) + tpp_prior_quant_1, 
                         data = dat))

m3_u_prior <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3),                                    se = "stata",
                        fixed_effects = tpp_prior_quant_1,
                        data = dat)
  
m3_u_prior_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3) + factor(tpp_prior_quant_1), data = dat))

m6_u_prior <- lm_robust(diff ~ I(preddistvalue) + (preddistvalue == 0),
                        se = "stata",
                        fixed_effects = tpp_prior_quant_1,
                        data = dat)

m6_u_prior_aic <- AIC(lm(diff ~ I(preddistvalue) + (preddistvalue == 0) + factor(tpp_prior_quant_1)+ (preddistvalue == 0) , 
                       data = dat))

m7_u_prior <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2) + (preddistvalue == 0) + factor(tpp_prior_quant_1),
                        se = "stata",
                        fixed_effects = tpp_prior_quant_1,
                        data = dat)

m7_u_prior_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2) + (preddistvalue == 0) + factor(tpp_prior_quant_1), 
                       data = dat))

m8_u_prior <- lm_robust(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3) + (preddistvalue == 0) + factor(tpp_prior_quant_1),
                        se = "stata",
                        fixed_effects = tpp_prior_quant_1,
                        data = dat)

m8_u_prior_aic <- AIC(lm(diff ~ I(preddistvalue) + I(preddistvalue^2) + I(preddistvalue^3) + (preddistvalue == 0) + factor(tpp_prior_quant_1), 
                       data = dat))

m1_c_prior <- lm_robust(prediction_cred ~ I(preddistvalue),
                        se = "stata",
                        fixed_effects = tpp_prior_quant_1,
                        data = dat)
m1_c_prior_aic <- AIC(lm(prediction_cred ~ I(preddistvalue) + factor(tpp_prior_quant_1), data = dat))

m2_c_prior <- lm_robust(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2),
                        se = "stata",
                        fixed_effects = tpp_prior_quant_1,
                        data = dat)
m2_c_prior_aic <- AIC(lm(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2) + factor(tpp_prior_quant_1), data = dat))

m4_c_prior <- lm_robust(prediction_cred ~ I(preddistvalue)  + (preddistvalue == 0),
                        se = "stata",
                        fixed_effects = tpp_prior_quant_1,
                        data = dat)

m4_c_prior_aic <- AIC(lm(prediction_cred ~ I(preddistvalue)  + (preddistvalue == 0) + factor(tpp_prior_quant_1), dat))

m5_c_prior <- lm_robust(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2)  + (preddistvalue == 0),
                        se = "stata",
                        fixed_effects = tpp_prior_quant_1,
                        data = dat)

m5_c_prior_aic <- AIC(lm(prediction_cred ~ I(preddistvalue) + I(preddistvalue^2)  + (preddistvalue == 0) + factor(tpp_prior_quant_1), data = dat))



###################################################################
########## UPDATING HETEROGENEITY (NO TABLE OR FIGURE) ############
###################################################################

## This is the analysis for the information on heterogeneity in updating discussed in connection to footnote 12 in the last paragraph before the conclusion.

## First, we define the denominator
## Because of the way I define respondents as not responding or perfect followers, we need to exclude respondents with preddist >= 2
## Denominator:
n_total <- dat %>% filter(!is.na(diff) & preddist > 2) %>% nrow()

## Define null updaters as +- 0.1 million jobs
n_defiers <- dat %>% filter(abs(diff) < 0.1 & preddist > 2) %>% nrow()
dat %>% filter(abs(diff) < 0.1 & preddist > 2) %>% select(preddist) %>% table
n_defiers / n_total

## Define perfect compliers
## As expected, the number of perfect compliers decrease as the predictions grow more extreme
dat %>% filter(abs(abs(diff) - preddistvalue) < 0.2  & preddist > 2) %>% select(preddist) %>% table

## Ratio:
n_compliers <- dat %>% filter(abs(abs(diff) - preddistvalue) < 0.1 & preddist > 2) %>% select(preddist) %>% nrow
n_compliers / n_total


#########################################################
############## CONSORT PARTICIPANT FLOW DIAGRAM #########
#########################################################

## total number of respondents who started taking the survey
N1 <- dat %>% nrow()

## total number of respondents who passed the attentiveness screeners
N2 <- dat %>% 
  filter(screener_1 == 2 & screener_2 == 5) %>% 
  nrow()

## received treatment: a reason for not receiving treatment is that the respondents did not provide answers about their prior beliefs
N3 <- dat %>% 
  filter(screener_1 == 2 & screener_2 == 5) %>% 
  filter(!is.na(preddist)) %>% 
  nrow()

## answered updating question
N4 <- dat %>% 
  filter(screener_1 == 2 & screener_2 == 5) %>% 
  filter(!is.na(preddist)) %>% 
  filter(!is.na(diff)) %>%
  nrow()

## answered credibility question
N5 <- dat %>% 
  filter(screener_1 == 2 & screener_2 == 5) %>% 
  filter(!is.na(preddist)) %>% 
  filter(!is.na(prediction_cred)) %>%
  nrow()

## answered both
N6 <- dat %>% 
  filter(screener_1 == 2 & screener_2 == 5) %>% 
  filter(!is.na(preddist)) %>% 
  filter(!is.na(diff) & !is.na(prediction_cred)) %>%
  nrow()


###########################################################
############## CHI2 TEST FOR TREATMENT ASSIGNMENT #########
###########################################################

## counts the number of assigned respondents in each treatment category
## tests whether counts are independent of treatment value
chisq.test(table(subset(dat, !is.na(preddist))$preddist))
